rm(list = ls())

## 

library(tidyverse)
library(knitr)
library(kableExtra)
library(reshape2)

## Load electoral data

bt <- readRDS('data/data_federal.rds') %>%
  filter(!is.na(treated)) %>% 
  filter(applies_census == 1) %>% 
  filter(!state_id == '08') %>% 
  filter(between(pop_dec_09, 5000, 15000))

## Load municipal finance data

finvars <- c('spending_total', 'ssz_total', 'current_debt_total')

finances <- read_rds('data/muni_finance.rds')%>% 
  mutate(state_id = factor(state_id))
outcomes_f <- colnames(finances) %>% 
  .[str_detect(., paste0(finvars, collapse = '|'))] %>% 
  .[str_detect(., paste0(c('12', '14', '16'), collapse = '|'))]

## Divide finance data by pre-census population (ie make per capita)

finances[, outcomes_f] <- sapply(outcomes_f, function(v) {
  finances[, v] / finances$bt_pop_2012
})

## Drop some variables

finances <- finances %>% 
  dplyr::select(ags, one_of(outcomes_f))

## Merge to the main DF

bt <- bt %>% 
  left_join(finances)

## List of variable for the table

outcomes <- c("turnout_party", "right_total_party", "left_total_party")
inhabitants <- c("pop_dec_09", "pop_post_census_2011")
finvars <- outcomes_f
pretreat_vars <- c('pop_foreign_share',
                   'pop_share_65plus_2011',
                   'soz_vers_beschaeftigte_share',
                   'unem_capita',
                   'households_prop_married',
                   'households_size_2ormore',
                   'residential_share_owneroccupied_2011', 
                   'residential_share_100sqmplus_2011', 
                   'residential_share_2000later_2011', 
                   'pop_density_km2', 
                   'migration_out_share', 
                   'euro_per_sqm_county_2012', 
                   'gdp_capita_county_2012')

## List of labels for the table

inhabitants_labels = c('Population in 2009', 'Post-census population (2011)')
finvars_labels <- rep(c('Municipal spending (', 'Transfers to municipalities (',
                        'Municipal debt ('), each = 3)  %>% 
  paste0(c('2012, EUR/capita)', 
           '2014, EUR/capita)', 
           '2016, EUR/capita)'))
pretreat_labels <- c('Foreign born / 1000 capita (2011)',
                     'Age 65+ / capita (2011)',
                     'Employment / capita (2011)',
                     'Unemployment / capita (2011)', 
                     'Households: prop. married couples (2011)',
                     'Households: prop. 2+ members (2011)',
                     'Residences: prop. owner-occupied (2011)',
                     'Residences: prop. 100+ sqm area (2011)',
                     'Residences: prop. built 2000 or later (2011)',
                     'Population density / km2 (2011)',
                     'Out-migration / capita (2011)', 
                     'Land value (county-level, 2012)',
                     'GDP / capita (county-level, 2012)')

## Reshape 

vote_df <- bt %>% 
  dplyr::select(year, ags, one_of(outcomes)) %>% 
  pivot_longer(cols = matches('party'), values_to = 'votes') %>% 
  pivot_wider(id_cols = c('ags'), names_from = c('name', 'year'), values_from = 'votes')

## Redefine outcome 

outcomes <- rep(outcomes, each = 3) %>% paste0('_') %>% 
  paste0(c(2009, 2013, 2017))
outcomes_labels <- rep(c('Turnout (', 'Right party vote share (',
                       'Left party vote share ('), each = 3)  %>% 
  paste0(c('2009)', '2013)', '2017)'))

## Create summary table df

table_df <- bt%>% dplyr::select(ags, year, one_of(c(inhabitants, 
                                                    pretreat_vars,
                                                    finvars))) %>% 
  filter(year == 2013) %>% 
  left_join(vote_df)

## Combine variable lists and label lists, create dictionary df

vars <- c(outcomes, inhabitants, finvars, 
          pretreat_vars)
labs <- c(outcomes_labels, inhabitants_labels, finvars_labels,
          pretreat_labels)

label_df <- data.frame(variable = vars, labs, stringsAsFactors = F)

## Get summary states

sum_stats <- table_df %>% 
  dplyr::select(-ags, -year) %>% 
  melt() %>% 
  group_by(variable) %>% 
  summarise_all(list(mean = ~mean(., na.rm = T),
                     median =  ~median(., na.rm = T),
                     sd = ~sd(., na.rm = T),
                     min = ~min(., na.rm = T),
                     max = ~max(., na.rm = T))) %>% 
  left_join(label_df) %>% 
  dplyr::select(-variable) %>% 
  dplyr::select(labs, mean, median, sd, min, max) %>% 
  mutate_if(is.numeric, round, 2)

# Table A.2: Summary stats ----

sumtable <- sum_stats %>%
  kable(format = "latex",
        align = c(rep('l', 1), rep('r', 5)),
        booktabs = TRUE,
        escape = F,
        caption = 'Summary statistics',
        col.names = c("Variable", "Mean",
                      "Median", "SD",
                      "Min", "Max"), digits = 2) %>%
  column_spec(1, width = '7.5cm') %>%
  kable_styling(font_size = 10, full_width = FALSE, 
                latex_options = c("hold_position",
                                  "repeat_header")) %>% 
  group_rows(start_row = 1, end_row = 2, 
             group_label = 'Population') %>% 
  group_rows(start_row = 3, end_row = 15, 
             group_label = 'Pre-treatment covariates') %>% 
  group_rows(start_row = 16, end_row = 24, 
             group_label = 'Municipal finances') %>% 
  group_rows(start_row = 25, end_row = 33, 
             group_label = 'Electoral outcomes (%)')
sumtable
